
cd "~/Dropbox/Education Chile/Replication"

capture local here : dirname c(filename)
if _rc==0 cd "`here'/../../.."

global DATA "Data"
global OUT  "$DATA"

use "$DATA/base_consolidada_1994_2023_v3.dta", clear

tab encuesta_a encuesta_m

// using age and survey year to determine age at coup

gen dob = encuesta_a - edad
* keep if dob >= 1950 & dob <= 1984 // first cut 


// 51,356 obs

codebook iden_pol_2, tab(100)
codebook democracia_19, tab(100)
replace democracia_19 = . if democracia_19 < 0

gen right_hard = 0 if iden_pol_2 !=.
replace right_hard = 1 if iden_pol_2 >=9 & iden_pol_2 <=10

gen right_soft = 0 if iden_pol_2 !=.
replace right_soft = 1 if iden_pol_2 >=7 & iden_pol_2 <=8

gen right = 0 if iden_pol_2 !=.
replace right = 1 if iden_pol_2 >=7 & iden_pol_2 <=10


gen left = 0 if iden_pol_2 !=.
replace left = 1 if iden_pol_2 >=1 & iden_pol_2 <=4

gen left_hard = 0 if iden_pol_2 !=.
replace left_hard = 1 if iden_pol_2 >=1 & iden_pol_2 <=2

gen left_soft = 0 if iden_pol_2 !=.
replace left_soft = 1 if iden_pol_2 >=3 & iden_pol_2 <=4

gen center = 0 if iden_pol_2 !=.
replace center = 1 if iden_pol_2 ==5 | iden_pol_2 ==6

gen none_pol = 0 if iden_pol_2 !=.
replace none_pol = 1 if iden_pol_2 <1

gen pos_pol = 1 if left == 1
replace pos_pol = 2 if center == 1
replace pos_pol = 3 if right == 1

label define p 1 "Left" 2 "Center" 3 "Right"
label values pos_pol p

*tab  right dob, col nofreq
*tab  right_hard dob, col nofreq
*tab  left dob, col nofreq // interesting
*tab  left_hard dob, col nofreq // interesting
*tab  center dob, col nofreq // interesting
*tab  none_pol dob, col nofreq // interesting

gen  ideology = iden_pol_2
replace ideology = . if iden_pol_2 < 1
replace ideology = . if iden_pol_2 == .

gen ideology_original = iden_pol_2 

// treatment dosage

// rule repression out, etc.

gen treatment_dosage_coup = -10 if dob ==1945
replace treatment_dosage_coup = -9 if dob ==1946
replace treatment_dosage_coup = -8 if dob ==1947
replace treatment_dosage_coup = -7 if dob ==1948
replace treatment_dosage_coup = -6 if dob ==1949
replace treatment_dosage_coup = -5 if dob ==1950
replace treatment_dosage_coup = -4 if dob ==1951
replace treatment_dosage_coup = -3 if dob ==1952
replace treatment_dosage_coup = -2 if dob ==1953
replace treatment_dosage_coup = -1 if dob ==1954
replace treatment_dosage_coup = 0 if dob ==1955
replace treatment_dosage_coup = 1 if dob ==1956
replace treatment_dosage_coup = 2 if dob ==1957
replace treatment_dosage_coup = 3 if dob ==1958
replace treatment_dosage_coup = 4 if dob ==1959
replace treatment_dosage_coup = 5 if dob ==1960
replace treatment_dosage_coup = 6 if dob ==1961
replace treatment_dosage_coup = 7 if dob ==1962
replace treatment_dosage_coup = 8 if dob ==1963
replace treatment_dosage_coup = 9 if dob ==1964
replace treatment_dosage_coup = 10 if dob ==1965

// so far: 1973 as control --> perhaps eliminate --> first strategy

drop if dob == 1955

gen treatment_dosage_placebo = treatment_dosage_coup - 4


// stricter def of treatment_dosage

gen treat1 = 0 if treatment_dosage_coup <= 0
replace treat1 = 1 if treatment_dosage_coup >= 1

// years of education

tab esc
gen ed_levels = 1 if esc < 8
replace ed_levels = 2 if esc == 8
replace ed_levels = 3 if esc > 8 & esc <= 11
replace ed_levels = 4 if esc == 12
replace ed_levels = 5 if esc > 12 & esc <= 16
replace ed_levels = 6 if esc >= 17
replace ed_levels = . if esc == .


label define ed1 1 "Less than MS" 2 "Middle School" 3 "Less than HS" 4 "High School" /// 
5 "Less than College" 6 "College or more"
label values ed_levels ed1
* tab  ed_levels dob, col nofreq


// crucial: sample are people who at least finished high school
* keep if ed_levels >= 4

// for mechanisms --> same model of people who did not finish hs

// other def of treatment: 4 years --> at least high school 

gen treat2 = 0 if treatment_dosage_coup <= 3
replace treat2 = 1 if treatment_dosage_coup >= 4 // 14 in 1973, HS with pinochet

/* 2 definitions of treatment:

1. At least one year with pinochet: 17 at coup --> treat1
2. At least HS with pinochet: 14 at coup --> treat2

*/

// other outcome

tab democracia_21, gen(dem_)
* tab  dem_3 dob, col nofreq

// dem_3: democracia siempre es preferible


drop anomia* bienestar* cambio_hora* constitucion* corrupcion* educacion* elec* estallido*  eval* iden* impuestos* info* internacional* medio_ambiente* mtf* municipio* pensiones* pobreza* pp* redes* salud* seguridad* terremoto* tiempo_libre* trabajo* transporte* vivienda* mapuche*

drop ciudadania* confianza*
drop dictadura* // not enough N

* outcomes: dem 20 and dem 19

drop democracia_10 - democracia_18_c democracia_22_a democracia_35_k democracia_4_a - democracia_7
drop democracia_22_b - democracia_35_j democracia_8_a democracia_8_b democracia_9

rename interes_pol_1_a pol_1
rename interes_pol_1_b pol_2
rename interes_pol_2_a pol_tv
rename interes_pol_2_b pol_news
rename interes_pol_2_c pol_social_media
rename interes_pol_2_d pol_talk_family
rename interes_pol_2_e pol_talk_friends
rename interes_pol_2_f pol_convince
rename interes_pol_2_g pol_work

drop interes_pol*
drop rol_gobierno*
drop percepcion*
drop polarizacion*
drop democracia_36
drop rotacion*

// 

gen treatment_dosage_dem = -5 if dob ==1967
replace treatment_dosage_dem = -4 if dob ==1968
replace treatment_dosage_dem = -3 if dob ==1969
replace treatment_dosage_dem = -2 if dob ==1970
replace treatment_dosage_dem = -1 if dob ==1971
replace treatment_dosage_dem = 0 if dob ==1972
replace treatment_dosage_dem = 1 if dob ==1973
replace treatment_dosage_dem = 2 if dob ==1974
replace treatment_dosage_dem = 3 if dob ==1975
replace treatment_dosage_dem = 4 if dob ==1976
replace treatment_dosage_dem = 5 if dob ==1977
replace treatment_dosage_dem = 6 if dob ==1978
replace treatment_dosage_dem = 7 if dob ==1979
replace treatment_dosage_dem = 8 if dob ==1980

gen treat_dem = 0 if treatment_dosage_dem <= 0
replace treat_dem = 1 if treatment_dosage_dem >= 1

// outcome 

gen authoritarian = 0 if democracia_21 !=.
replace authoritarian = 1 if democracia_21 == 2 | democracia_21 == 3

// Outcomes

codebook pol_1 pol_2

gen pol_all = pol_1
replace pol_all = pol_2 if pol_1 == .
replace pol_all = . if pol_all <1

// dummy variable

gen pol_interested = 0 if pol_all !=.
replace pol_interested = 1 if pol_all == 1 | pol_all == 2 

// outcome: frequently dummies

foreach v of varlist pol_tv pol_news pol_social_media pol_talk_family pol_talk_friends pol_convince pol_work {
tab `v', gen(`v'_)
}

// how democracy works

replace democracia_20 = . if democracia_20 <0

gen democracy_bad = 0 if democracia_20 !=.
replace democracy_bad = 1 if democracia_20 == 1 | democracia_20 == 2

gen region_all = region_1
replace region_all =  region_2 if region_all == .
replace region_all =  region_3 if region_all == .


drop religion_10_a - religion_9

tab religion_1, gen(catholic_)

foreach v of varlist sexo edad esc gse zona_u_r {
replace `v' = 99 if `v' ==.
}

gen decade = 1 if encuesta_a >=1994 & encuesta_a <=2014
replace decade = 2 if encuesta_a >=2015 & encuesta_a <=2023


gen right_gov = 1 if encuesta_a >=2010 & encuesta_a <=2013
replace right_gov = 1 if encuesta_a >=2018 & encuesta_a <=2021

gen left_gov = 1 if right_gov == .

* balance whole sampple

foreach v of varlist ed_levels gse  sexo zona_u_r {
	tab `v', gen(`v'_)
}


prtest ed_levels_1 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)
prtest ed_levels_2 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)
prtest ed_levels_3 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)


prtest gse_1 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)
prtest gse_2 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)
prtest gse_3 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)
prtest gse_4 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)
prtest gse_5 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)
prtest sexo_2 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)
prtest zona_u_r_1 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)
ttest edad if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)



prtest gse_1 if treatment_dosage_coup <=5, by(treat1)
prtest gse_2 if treatment_dosage_coup <=5, by(treat1)
prtest gse_3 if treatment_dosage_coup <=5, by(treat1)
prtest gse_4 if treatment_dosage_coup <=5, by(treat1)
prtest gse_5 if treatment_dosage_coup <=5, by(treat1)
prtest sexo_2 if treatment_dosage_coup <=5, by(treat1)
prtest zona_u_r_1 if treatment_dosage_coup <=5, by(treat1)



tab  ed_levels treat1 if treatment_dosage_coup <=5, col nofreq chi2 // gonzalez et al paper.
tab  gse treat1 if treatment_dosage_coup <=5, col nofreq chi2
tab  sexo treat1 if treatment_dosage_coup <=5, col chi2
tabstat  edad if treatment_dosage_coup <=5, by(treat1)
ttest  edad if treatment_dosage_coup <=5, by(treat1)
tab  zona_u_r treat1 if treatment_dosage_coup <=5, col nofreq chi2

* Balance 1 year of bw

tab  ed_levels treat1 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, col nofreq chi2
tab  gse treat1 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, col nofreq chi2
tab  sexo treat1 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, col chi2
tabstat  edad if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)
ttest  edad if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, by(treat1)
tab  zona_u_r treat1 if treatment_dosage_coup>=-1 & treatment_dosage_coup <=1, col nofreq chi2

* Balance 2 year of bw

tab  ed_levels treat1 if treatment_dosage_coup>=-2 & treatment_dosage_coup <=2, col nofreq
tab  gse treat1 if treatment_dosage_coup>=-2 & treatment_dosage_coup <=2, col nofreq
tab  sexo treat1 if treatment_dosage_coup>=-2 & treatment_dosage_coup <=2, col nofreq
tabstat  edad if treatment_dosage_coup>=-2 & treatment_dosage_coup <=2, by(treat1)
tab  zona_u_r treat1 if treatment_dosage_coup>=-2 & treatment_dosage_coup <=2, col nofreq

* Balance 3 year of bw

tab  ed_levels treat1 if treatment_dosage_coup>=-3 & treatment_dosage_coup <=3, col nofreq
tab  gse treat1 if treatment_dosage_coup>=-3 & treatment_dosage_coup <=3, col nofreq
tab  sexo treat1 if treatment_dosage_coup>=-3 & treatment_dosage_coup <=3, col nofreq
tabstat  edad if treatment_dosage_coup>=-3 & treatment_dosage_coup <=3, by(treat1)
tab  zona_u_r treat1 if treatment_dosage_coup>=-3 & treatment_dosage_coup <=3, col nofreq

tab ed_levels, gen(ed)

gen more_hs = 0
replace more_hs = 1 if ed_levels == 5 | ed_levels == 6

egen cluster = group(encuesta dob)

// mechanisms: life trajectories --> gonzalez et al.

// create different data for other subset --> less than HS

gen edad55 = 0 if edad <=55
replace edad55 = 1 if edad > 55

gen region_rm = 1 if region_all == 13 

gen region_north = 1 if region_all >= 1 & region_all <= 5
replace region_north = 1 if region_all == 15

gen region_south = 1 if region_all >= 6 & region_all <= 12
replace region_south = 1 if region_all == 14
replace region_south = 1 if region_all == 16

tab ed_levels region_rm, col
tab ed_levels region_north, col
tab ed_levels region_south, col


// Dosage

tab treatment_dosage_coup
gen sample_dosage1 = 1 if treatment_dosage_coup >= -1 & treatment_dosage_coup <= 1
gen sample_dosage2 = 1 if treatment_dosage_coup >= -1 & treatment_dosage_coup <= 2
gen sample_dosage3 = 1 if treatment_dosage_coup >= -1 & treatment_dosage_coup <= 3
gen sample_dosage4 = 1 if treatment_dosage_coup >= -1 & treatment_dosage_coup <= 4
gen sample_dosage5 = 1 if treatment_dosage_coup >= -1 & treatment_dosage_coup <= 5


tab treatment_dosage_coup

tab left

tab ideology

tab treatment_dosage_coup sexo, row 

keep if left !=.
keep if treatment_dosage_coup >= -5 & treatment_dosage_coup <=5
keep if ed_levels >= 4

tab encuesta_a encuesta_m

local keepvars ///
    encuesta encuesta_a encuesta_m ///
    dob edad sexo sexo_2 zona_u_r gse ed_levels edad55 ///
    region_rm region_north region_south ///
    treatment_dosage_coup treat1 ///
    left right center none_pol left_hard left_soft ///
    dem_3 democracy_bad ///
    cluster

ds
local present `r(varlist)'
local keep_present : list keepvars & present
keep `keep_present'
order encuesta encuesta_a encuesta_m dob edad sexo sexo_2 zona_u_r gse ed_levels edad55 ///
      region_rm region_north region_south ///
      treatment_dosage_coup treat1 ///
      left right center none_pol left_hard left_soft ///
      dem_3 democracy_bad cluster
compress

save "$OUT/cep_all.dta", replace
